This markdown file is use for map and inspection EDA
step1: input dataset DOHMH Childcare Center Inspections (Updated from november 25,2022) from NYC OpenData
Due to irregularities in writing, there is a lot of duplication in the child care records. The same child care centers can be different in many separate records because of typo. We were interested in calculating the number of violations for each child care center more accurately, so we standardized the lowercase names of child care centers, removed punctuation and spaces, and eliminated characters that could cause inconsistencies to merge the same legal names.
children_center<-
raw_data%>%
janitor::clean_names()%>%
mutate(
legal_name=tolower(legal_name),
legal_name=gsub('[[:punct:] ]+',' ',legal_name),
legal_name=gsub(" ","",legal_name),
legal_name=gsub("llc","",legal_name),
legal_name=gsub("inc","",legal_name),
legal_name=gsub("th","",legal_name),
legal_name=gsub("school","",legal_name),
legal_name=gsub("i","",legal_name),
legal_name=gsub("center","",legal_name),
legal_name=gsub("ctr","",legal_name)
)%>%
select(legal_name)%>%
group_by(legal_name)%>%
summarise(
n_obs=n()
)
We pinpointed the status of violation in different areas of New York’s child care centers with zipcode, so the map can more visually present to the viewer the number of violations recorded in different areas of New York’s child care centers.
total_obs<-
raw_data%>%
janitor::clean_names()%>%
drop_na(zip_code)%>%
drop_na(violation_category)%>%
select(zip_code,violation_category)%>%
group_by(zip_code)%>%
summarise(
n_obs=n()
)
total_obs
## # A tibble: 175 × 2
## zip_code n_obs
## <dbl> <int>
## 1 10001 86
## 2 10002 150
## 3 10003 87
## 4 10004 30
## 5 10005 11
## 6 10007 28
## 7 10009 25
## 8 10010 44
## 9 10011 82
## 10 10012 15
## # … with 165 more rows
Map
#install.packages("rgdal")
#install.packages("maps")
#install.packages("BAMMtools")
#install.packages("spdep")
#install.packages("maptools")
library(tidyverse)
library(plotly)
library(rgdal)
library(plotly)
library(maps)
library(devtools)
library(leaflet)
library(BAMMtools)
library(spdep)
library(maptools)
zipcode<- read_csv("./data/US Zip Codes from 2013 Government Data.csv")
zipcode<-
zipcode%>%
mutate(
zip_code=ZIP,
zip_code=as.numeric(zip_code))
zipcode_lat<-
left_join(total_obs,zipcode,by='zip_code')%>%
mutate(
zip_code=as.character(zip_code)
)
# zip code tabulation area
# https://data.cityofnewyork.us/Business/Zip-Code-Boundaries/i8iw-xf4u/data
# import the shape file
# https://plotly-book.cpsievert.me/maps.html
zip_map <- readOGR(dsn = './data/ZIP_CODE_040114/ZIP_CODE_040114.shp', encoding = "UTF-8")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/huafa/Desktop/P8105/final_project.github.io/data/ZIP_CODE_040114/ZIP_CODE_040114.shp", layer: "ZIP_CODE_040114"
## with 263 features
## It has 12 fields
zip_map@data <- left_join(zip_map@data,zipcode_lat, by = c('ZIPCODE' = 'zip_code'))
# assign 0 for zip codes that have no match in the dog bite data
zip_map$n_obs[is.na(zip_map$n_obs)] <- 0
# CRS setting
zip_map_crs <- spTransform(zip_map, CRS("+init=epsg:4326"))
# export the json file
# writeOGR(zip_map_crs, './data/zip_map_geojson', layer = 'zip_map', driver = 'GeoJSON')
# Layout
# format of the label that pops up for each polygon
label_popup <- paste0(
"<strong>Zip code: </strong>",
zip_map$ZIPCODE,
"<br><strong>Number of Violation: </strong>",
zip_map$n_obs
)
# get jenks natural break for violation
getJenksBreaks(zip_map$n_obs, 6)
## [1] 0 31 90 169 255 413
# set bins
bite_bins <- c(0, 25, 62, 97, 141, 260)
pit_bins <- c(0 , 4, 14, 25, 41, 80)
# set pals
bite_pal <- colorBin('Greens', bins = bite_bins, na.color = '#aaff56')
# choropleth map for violation
leaflet::leaflet(data = zip_map_crs) %>%
addProviderTiles('CartoDB.Positron') %>%
addPolygons(fillColor = ~bite_pal(n_obs),
fillOpacity = 0.8,
color = "#BDBDC3",
weight = 1,
popup = label_popup,
highlightOptions = highlightOptions(color = "black", weight = 2,
bringToFront = TRUE)) %>%
addLegend('bottomleft',
pal = bite_pal,
values = ~n_obs,
title = 'Number of violation incidents by zip code',
opacity = 1)